In [1]:
import numpy as np
In [2]:
classnum = np.arange(1, 25)
classnum = np.delete(classnum, np.where(classnum == 20))
classnum
Out[2]:
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 21, 22, 23, 24])
In [3]:
np.random.choice(classnum, size = 3, replace = False)
Out[3]:
array([ 2, 6, 14])
In [4]:
np.random.choice(classnum, size = 1, replace = False)
Out[4]:
array([12])
In [5]:
# import plotly.io as pio
# pio.renderers.default='notebook'
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
Plotly¶
In [6]:
import pandas as pd
df = pd.DataFrame(data={'Time':[1100, 1110, 1120, 1130, 1140, 1150],
'Temperature':[30.0, 30.05, 30.06, 30.1, 30.14, 30.23]})
df
Out[6]:
| Time | Temperature | |
|---|---|---|
| 0 | 1100 | 30.00 |
| 1 | 1110 | 30.05 |
| 2 | 1120 | 30.06 |
| 3 | 1130 | 30.10 |
| 4 | 1140 | 30.14 |
| 5 | 1150 | 30.23 |
In [7]:
import plotly.express as px
fig = px.scatter(df, x="Time", y="Temperature", trendline="ols")
fig.show()
Calculate the value at 1200¶
In [8]:
0.00417143 * 1200 + 25.4038
Out[8]:
30.409516
In [ ]:
Seaborn¶
In [9]:
df
Out[9]:
| Time | Temperature | |
|---|---|---|
| 0 | 1100 | 30.00 |
| 1 | 1110 | 30.05 |
| 2 | 1120 | 30.06 |
| 3 | 1130 | 30.10 |
| 4 | 1140 | 30.14 |
| 5 | 1150 | 30.23 |
In [10]:
import seaborn as sns
sns.regplot(data = df, x="Time", y="Temperature" )
Out[10]:
<Axes: xlabel='Time', ylabel='Temperature'>
In [ ]:
Matplotlib¶
In [11]:
yearmonth = pd.Index(['2024-03','2024-04','2024-05','2024-06','2024-07','2024-08'])
actualorder = pd.Series([2305, 2108, 2375, 2422, 2493, 2532], index=yearmonth)
predictorder = pd.Series([2355, 2100, 2365, 2445, 2499, 2565], index=yearmonth)
In [12]:
df = pd.DataFrame(data={
'ActualOrder': actualorder,
'PredictOrder': predictorder
})
df
Out[12]:
| ActualOrder | PredictOrder | |
|---|---|---|
| 2024-03 | 2305 | 2355 |
| 2024-04 | 2108 | 2100 |
| 2024-05 | 2375 | 2365 |
| 2024-06 | 2422 | 2445 |
| 2024-07 | 2493 | 2499 |
| 2024-08 | 2532 | 2565 |
In [13]:
import matplotlib.pyplot as plt
plt.plot(yearmonth, actualorder, label='Actual Order')
plt.plot(yearmonth, predictorder, '--', label='Predict Order')
plt.ylabel('Order in month')
plt.xlabel('2024 Order Forecast')
plt.grid(True)
plt.legend()
plt.title('Order Forecast vs Actual Order as of Aug 2024')
Out[13]:
Text(0.5, 1.0, 'Order Forecast vs Actual Order as of Aug 2024')
Alternative Answer¶
In [14]:
import matplotlib.pyplot as plt
plt.plot(df.ActualOrder, label='Actual Order')
plt.plot(df.PredictOrder, '--', label='Predict Order')
plt.ylabel('Order in month')
plt.xlabel('Fiscal Month')
plt.grid(True)
plt.legend()
plt.title('Order Forecast vs Actual Order as of Aug 2024')
Out[14]:
Text(0.5, 1.0, 'Order Forecast vs Actual Order as of Aug 2024')
Alternative in Seaborn¶
In [15]:
import warnings
warnings.filterwarnings("ignore")
sns.lineplot(df)
plt.grid(True)
plt.title('Order Forecast vs Actual Order as of Aug 2024')
Out[15]:
Text(0.5, 1.0, 'Order Forecast vs Actual Order as of Aug 2024')
Alternative in Plotly¶
In [16]:
px.line(df)
In [ ]:
Numpy¶
In [17]:
arr2D = np.array([[2, 3],
[4, 5],
[6, 7]
], dtype=np.float64)
arr2D
Out[17]:
array([[2., 3.],
[4., 5.],
[6., 7.]])
In [18]:
arr2D * 2.1
Out[18]:
array([[ 4.2, 6.3],
[ 8.4, 10.5],
[12.6, 14.7]])
In [19]:
arr2D.shape
Out[19]:
(3, 2)
In [20]:
arr2D > 5
Out[20]:
array([[False, False],
[False, False],
[ True, True]])
In [ ]:
In [ ]:
Pandas¶
In [21]:
import pandas as pd
yearmonth = pd.Index(['2024-03','2024-04','2024-05','2024-06','2024-07','2024-08'])
actualorder = pd.Series([2305, 2108, 2375, 2422, 2493, 2532], index=yearmonth)
predictorder = pd.Series([2355, 2100, 2365, 2445, 2499, 2565], index=yearmonth)
In [22]:
df = pd.DataFrame(data={
'ActualOrder': actualorder,
'PredictOrder': predictorder
})
df
Out[22]:
| ActualOrder | PredictOrder | |
|---|---|---|
| 2024-03 | 2305 | 2355 |
| 2024-04 | 2108 | 2100 |
| 2024-05 | 2375 | 2365 |
| 2024-06 | 2422 | 2445 |
| 2024-07 | 2493 | 2499 |
| 2024-08 | 2532 | 2565 |
In [23]:
df.loc[(df.index>='2024-06') & (df.ActualOrder>2450)]
Out[23]:
| ActualOrder | PredictOrder | |
|---|---|---|
| 2024-07 | 2493 | 2499 |
| 2024-08 | 2532 | 2565 |
In [24]:
df.loc[df.index=='2024-04', 'ActualOrder'] = 2018
df
Out[24]:
| ActualOrder | PredictOrder | |
|---|---|---|
| 2024-03 | 2305 | 2355 |
| 2024-04 | 2018 | 2100 |
| 2024-05 | 2375 | 2365 |
| 2024-06 | 2422 | 2445 |
| 2024-07 | 2493 | 2499 |
| 2024-08 | 2532 | 2565 |
In [ ]:
In [ ]:
All¶
In [25]:
detailOrder = pd.DataFrame(data={
'OrderDate' : ['2024-09-01','2024-09-01', '2024-09-03', '2024-09-01'],
'OrderID' : [1002, 1002, 1005, 1002],
'ProductID' : [111, 222, 555, 333],
'Quantity' : [3, 2, 1, 4],
'ClientID' : [888, 888, 999, 888],
})
detailOrder
Out[25]:
| OrderDate | OrderID | ProductID | Quantity | ClientID | |
|---|---|---|---|---|---|
| 0 | 2024-09-01 | 1002 | 111 | 3 | 888 |
| 1 | 2024-09-01 | 1002 | 222 | 2 | 888 |
| 2 | 2024-09-03 | 1005 | 555 | 1 | 999 |
| 3 | 2024-09-01 | 1002 | 333 | 4 | 888 |
In [26]:
products = pd.DataFrame(data={
'ProductID': [111, 222, 333, 444, 555],
'ProductName': ['Python 101', 'Machine Learn ABC', 'Data Science Deep Dive',
'Neural Network', 'Happiness in Programming']
})
products
Out[26]:
| ProductID | ProductName | |
|---|---|---|
| 0 | 111 | Python 101 |
| 1 | 222 | Machine Learn ABC |
| 2 | 333 | Data Science Deep Dive |
| 3 | 444 | Neural Network |
| 4 | 555 | Happiness in Programming |
In [27]:
clientInfo = pd.DataFrame(data={
'ClientID' : [666, 777, 888, 999 ],
'Name' : ['Alice', 'Bob', 'Charlie', 'Dave'],
'Address' : ['23 Abs Street, Kowloon',
'37 Bon Road, NT',
'86 Caine Lane, Kowloon',
'72 Doe Avenue, Hong Kong'],
'Phone' : [234567, 456789, 567891, 789123],
})
clientInfo
Out[27]:
| ClientID | Name | Address | Phone | |
|---|---|---|---|---|
| 0 | 666 | Alice | 23 Abs Street, Kowloon | 234567 |
| 1 | 777 | Bob | 37 Bon Road, NT | 456789 |
| 2 | 888 | Charlie | 86 Caine Lane, Kowloon | 567891 |
| 3 | 999 | Dave | 72 Doe Avenue, Hong Kong | 789123 |
In [28]:
packageDF = detailOrder.merge(clientInfo, on='ClientID')
packageDF
Out[28]:
| OrderDate | OrderID | ProductID | Quantity | ClientID | Name | Address | Phone | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2024-09-01 | 1002 | 111 | 3 | 888 | Charlie | 86 Caine Lane, Kowloon | 567891 |
| 1 | 2024-09-01 | 1002 | 222 | 2 | 888 | Charlie | 86 Caine Lane, Kowloon | 567891 |
| 2 | 2024-09-01 | 1002 | 333 | 4 | 888 | Charlie | 86 Caine Lane, Kowloon | 567891 |
| 3 | 2024-09-03 | 1005 | 555 | 1 | 999 | Dave | 72 Doe Avenue, Hong Kong | 789123 |
In [29]:
detailOrder.merge(clientInfo, how='left', on='ClientID')
Out[29]:
| OrderDate | OrderID | ProductID | Quantity | ClientID | Name | Address | Phone | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2024-09-01 | 1002 | 111 | 3 | 888 | Charlie | 86 Caine Lane, Kowloon | 567891 |
| 1 | 2024-09-01 | 1002 | 222 | 2 | 888 | Charlie | 86 Caine Lane, Kowloon | 567891 |
| 2 | 2024-09-03 | 1005 | 555 | 1 | 999 | Dave | 72 Doe Avenue, Hong Kong | 789123 |
| 3 | 2024-09-01 | 1002 | 333 | 4 | 888 | Charlie | 86 Caine Lane, Kowloon | 567891 |
In [30]:
shipping = packageDF.merge(products, on='ProductID')[['OrderID', 'Name', 'Address', 'Phone', 'ProductName', 'Quantity']]
shipping
Out[30]:
| OrderID | Name | Address | Phone | ProductName | Quantity | |
|---|---|---|---|---|---|---|
| 0 | 1002 | Charlie | 86 Caine Lane, Kowloon | 567891 | Python 101 | 3 |
| 1 | 1002 | Charlie | 86 Caine Lane, Kowloon | 567891 | Machine Learn ABC | 2 |
| 2 | 1002 | Charlie | 86 Caine Lane, Kowloon | 567891 | Data Science Deep Dive | 4 |
| 3 | 1005 | Dave | 72 Doe Avenue, Hong Kong | 789123 | Happiness in Programming | 1 |
In [31]:
packageDF.merge(products, on='ProductID').drop(['ProductID','ClientID','OrderDate'], axis=1)
Out[31]:
| OrderID | Quantity | Name | Address | Phone | ProductName | |
|---|---|---|---|---|---|---|
| 0 | 1002 | 3 | Charlie | 86 Caine Lane, Kowloon | 567891 | Python 101 |
| 1 | 1002 | 2 | Charlie | 86 Caine Lane, Kowloon | 567891 | Machine Learn ABC |
| 2 | 1002 | 4 | Charlie | 86 Caine Lane, Kowloon | 567891 | Data Science Deep Dive |
| 3 | 1005 | 1 | Dave | 72 Doe Avenue, Hong Kong | 789123 | Happiness in Programming |
In [32]:
products[products['ProductName'].str.contains('Happiness')]
Out[32]:
| ProductID | ProductName | |
|---|---|---|
| 4 | 555 | Happiness in Programming |
In [33]:
studentIQ = np.array([94, 70, 72, 90, 95, 99, 103, 81, 80, 75, 102, 110, 130, 80, 83, 115, 86, 92, 99, 78, 108, ])
studentIQ
Out[33]:
array([ 94, 70, 72, 90, 95, 99, 103, 81, 80, 75, 102, 110, 130,
80, 83, 115, 86, 92, 99, 78, 108])
In [34]:
studentIQ.mean()
Out[34]:
92.47619047619048
In [35]:
np.mean(studentIQ)
Out[35]:
92.47619047619048
In [36]:
studentIQ.std()
Out[36]:
15.085846560266656
In [37]:
np.std(studentIQ)
Out[37]:
15.085846560266656
In [38]:
studentIQ.min()
Out[38]:
70
In [39]:
np.min(studentIQ)
Out[39]:
70
In [42]:
np.count_nonzero(studentIQ > 90)
Out[42]:
11
In [54]:
np.where(studentIQ>90,1,0).sum()
Out[54]:
11
In [55]:
np.sum(studentIQ>90)
Out[55]:
11
In [56]:
# import warnings
# warnings.filterwarnings("ignore")
sns.histplot(studentIQ, kde=True)
Out[56]:
<Axes: ylabel='Count'>
In [ ]: